package com.ly.blog_data.service.impl;

import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.ly.blog_common.constant.DataConstant;
import com.ly.blog_data.dao.DataResultDao;
import com.ly.blog_data.entity.CatalogueEntity;
import com.ly.blog_data.entity.CatalogueRuleEntity;
import com.ly.blog_data.entity.ContentRuleEntity;
import com.ly.blog_data.entity.DataResultEntity;
import com.ly.blog_data.feign.ThirdService;
import com.ly.blog_data.parse.HtmlParserManager;
import com.ly.blog_data.parse.MsgList;
import com.ly.blog_data.service.HtmlService;
import com.ly.blog_data.utils.HtmlInfoExplainUtils;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.stereotype.Service;

import java.text.DecimalFormat;
import java.util.List;
import java.util.stream.Collectors;

/**
 * @author ly create at 2021/6/22 - 13:41
 **/

@Slf4j
@Service
public class HtmlServiceImpl implements HtmlService {

    final DecimalFormat df   = new DecimalFormat("######0.0000");

    @Autowired
    private DataResultDao dataResultDao;

    @Autowired
    private ThirdService thirdService;

    @Autowired
    private RedisTemplate redisTemplate;


    @Override
    public List<CatalogueEntity> catchData(String catalogueURL,
                                           CatalogueRuleEntity catalogueRuleEntity,
                                           ContentRuleEntity contentRuleEntity
    ) {

        //通过url拿到页面信息
        String catalogueURLHtmlContent = HtmlInfoExplainUtils.getHtmlContent(catalogueURL);

        //1、解析目录
        List<CatalogueEntity> catalogueEntities = HtmlParserManager.explainCatalogue(catalogueURLHtmlContent,catalogueRuleEntity,catalogueURL.substring(0,catalogueURL.lastIndexOf("/")));


        //1-2 判定当前url是否已经执行过 - 或者执行过内容为空的
        List<String> urlsAll = catalogueEntities.stream().map(item -> item.getUrl()).collect(Collectors.toList());;
        List<DataResultEntity> dataResultEntities = dataResultDao.selectList(
                new QueryWrapper<DataResultEntity>().in("exe_url", urlsAll).isNotNull("content")
        );
        List<String> urlsExed = dataResultEntities.stream().map(item -> item.getExeUrl()).collect(Collectors.toList()); //获取已经执行过的Url
        catalogueEntities = catalogueEntities.stream().filter(item -> !urlsExed.contains(item.getUrl())).collect(Collectors.toList());
        int len = catalogueEntities.size();
        if(len == 0){
            log.info("【执行数据获取任务】 - 当前执行没有URL，存在URL获取失败或者重复执行。");
        }else{
            log.info("【执行数据获取任务】 - 开始执行URL。任务数据量：{}条；",catalogueEntities.size());
        }

        redisTemplate.opsForValue().set(DataConstant.TASK_PERCENTAGE,10);


        //2、解析内容
        for (int i = 0; i < len; i++) {
            CatalogueEntity catalogueEntity = catalogueEntities.get(i);
            String contentUrl = catalogueEntity.getUrl();
            String contentURLHtmlContent = HtmlInfoExplainUtils.getHtmlContent(contentUrl);
            String content = HtmlParserManager.explainContent(contentURLHtmlContent,contentRuleEntity,contentUrl,thirdService);
            catalogueEntity.setContent(content);
            Double percent = Double.valueOf((i+1)+"")/Double.valueOf(len+"");
            redisTemplate.opsForValue().set(DataConstant.TASK_PERCENTAGE,df.format(10+(90 * percent)));
            //每次执行后缓冲 防止ip被锁 1s
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
        }
        redisTemplate.opsForValue().set(DataConstant.TASK_PERCENTAGE,100);

        return catalogueEntities;
    }


    public static void main(String[] args) {
        int i=3;
        int len = 789;
        Float percent = Float.valueOf((i+1)+"")/Float.valueOf(len+"");
        System.out.println(percent);
    }

    @Override
    public MsgList<String> checkData(String catalogueURL, CatalogueRuleEntity catalogueRuleEntity, ContentRuleEntity contentRuleEntity) {
        //通过url拿到页面信息
        String catalogueURLHtmlContent = HtmlInfoExplainUtils.getHtmlContent(catalogueURL);

        MsgList<String> checkResult = new MsgList<>();
        //1、解析目录
        List<CatalogueEntity> catalogueEntities = HtmlParserManager.explainCatalogueCheck(catalogueURLHtmlContent,catalogueRuleEntity,catalogueURL.substring(0,catalogueURL.lastIndexOf("/")),checkResult);

        if(catalogueEntities == null || catalogueEntities.size() == 0){
            return checkResult;
        }
        checkResult.addd("解析目录成功 --- 长度：").addd(catalogueEntities.size());

        //2、解析内容
        int len = catalogueEntities.size();
        for (int i = 0; i < len; i++) {
            CatalogueEntity catalogueEntity = catalogueEntities.get(i);
            String contentUrl = catalogueEntity.getUrl();
            String contentURLHtmlContent = HtmlInfoExplainUtils.getHtmlContent(contentUrl);
            String content = HtmlParserManager.explainContentCheck(contentURLHtmlContent,contentRuleEntity,contentUrl,thirdService,checkResult);
            catalogueEntity.setContent(content);

            //每次执行后缓冲 防止ip被锁 1s
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            i = i + len / 3;
        }

        return checkResult;
    }
}
